######################################################
#2014 PA Democratic Primary Election Analysis.
# When Women Run, Voters Will Follow (Sometimes): 
##Examining the Mobilizing Effect of Female Candidates in the 2014 and 2018 Midterm Elections 
#By Safarpour, Wyckoff Gaynor, Rouse, and Swers #
######################################################

#clean the environment.
rm(list=ls()) 

#Packages.
library(mvtnorm) 
library(stargazer)
library(stats)
library(arm)
library(plyr)
library(Hmisc)
library(dplyr)

#Setwd.
setwd("/Users/ACS/Dropbox/When Women Run/Revision_PoliticalBehavior/R&R Part 2/Publication Docs/Replication Data and Code/")

#Data.
DEM<-read.csv("PA2014PrimaryDataFinal.csv", 
             header = TRUE, stringsAsFactors = F)

length(DEM$X) #should be 3733758.

#Model.
m1<- glm(voted2014primary~ womanprimarydem+ 
           female2+ womanprimarydem*female2+ 
           Millennials2014+ womanprimarydem*Millennials2014+
           GenerationX2014+ womanprimarydem*GenerationX2014+
           SilentGeneration2014+ womanprimarydem*SilentGeneration2014+
           voted2012+obamavote2012,
         data=DEM, family=binomial(link="logit"))
summary(m1)


stargazer(m1, type="html", out="PA2014PrimaryModelResults.html",
          covariate.labels = c("Female Candidate", "Female",
                               "Millennials", "Generation X","Silent Generation", 
                               "Voted in 2012 General Election", 
                               "County Obama Vote 2012", 
                               "Female Candidate*Female",
                               "Female Candidate*Millennials",
                               "Female Candidate*Generation X",
                               "Female Candidate*Silent Generation"),
          dep.var.labels="Voted in PA 2014 Democratic Primary Election", 
          title = "Effects of candidate gender, generation, gender,  party, prior voting, and county Obama vote share on 2014 Primary Turnout in Pennsylvania",
          notes        = "Results from logistic regression. Standard errors in parentheses. Baseline age category: Baby Boomers. Model excludes registrants with registration date after deadline to vote in primary who do not have a vote history for this election. Model excludes districts 15 and 18.", 
          notes.append = TRUE, notes.align = "l", digits=3, single.row = T)



#Compute Predicted Probs.
sample <- DEM[complete.cases(DEM$voted2014primary, DEM$womanprimarydem, DEM$SilentGeneration2014, 
                             DEM$Millennials2014, DEM$GenerationX2014, DEM$female2,
                             DEM$voted2012, DEM$obamavote2012)==T,] # Regression sample
rm(DEM) #drop full dataset.
preds <- predict(m1, type = "response")
options(digits=4)
summary(preds) #mean turnout.

n_draws <- 1000
set.seed(1714)
vcov<- vcov(m1)
coef<-coef(m1)
sim_coefs <- rmvnorm(n_draws, coef, vcov) #Specify the 1,000 simulated coefficients.
rbind(coef(m1), apply(sim_coefs, 2, mean)) # Check they are close to original

ppwomen0.s <- NULL
for (i in 1:n_draws) {
  ppwomen0.s[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                                   sim_coefs[i,3]*sample$female2 + 
                                   sim_coefs[i,4]*sample$Millennials2014 +
                                   sim_coefs[i,5]*sample$GenerationX2014 +
                                   sim_coefs[i,6]*sample$SilentGeneration2014+
                                   sim_coefs[i,7]*sample$voted2012+
                                   sim_coefs[i,8]*sample$obamavote2012+
                                   sim_coefs[i,9]*sample$female2*0+
                                   sim_coefs[i,10]*sample$Millennials2014*0 +
                                   sim_coefs[i,11]*sample$GenerationX2014*0 +
                                   sim_coefs[i,12]*sample$SilentGeneration2014*0))
}
mean(ppwomen0.s) #

ppwomen1.s <- NULL
for (i in 1:n_draws) {
  ppwomen1.s[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                                   sim_coefs[i,3]*sample$female2 + 
                                   sim_coefs[i,4]*sample$Millennials2014 +
                                   sim_coefs[i,5]*sample$GenerationX2014 +
                                   sim_coefs[i,6]*sample$SilentGeneration2014+
                                   sim_coefs[i,7]*sample$voted2012+
                                   sim_coefs[i,8]*sample$obamavote2012+
                                   sim_coefs[i,9]*sample$female2*1+
                                   sim_coefs[i,10]*sample$Millennials2014*1 +
                                   sim_coefs[i,11]*sample$GenerationX2014*1 +
                                   sim_coefs[i,12]*sample$SilentGeneration2014*1))
}
mean(ppwomen1.s) #

effect.01 <-  ppwomen1.s-ppwomen0.s
summary(effect.01) # (0-1 women candidate effect)

# CIs and put results together in matrix
elements <- list(ppwomen0.s, ppwomen1.s, effect.01) 
lapply(elements, summary) # See summary for each element of the list
ci <- do.call("rbind", (lapply(elements, quantile, c(.025,.975)))) # Get quntiles for each

results <- cbind(ci[,1], lapply(elements, mean), ci[,2])
colnames(results) <- c("2.5", "Mean", "97.5")   
rownames(results) <- c("No Women Ran", "Female Candidate", "Effect No Women-Female Candidate")
results

##Get Predictions by Generation. 
w0.SG	<- NULL
w1.SG 	<- NULL
w0.BB	<- NULL
w1.BB 	<- NULL
w0.GX	<- NULL
w1.GX 	<- NULL
w0.M	<- NULL
w1.M 	<- NULL


SG <- subset(sample, SilentGeneration2014==1)
BB <- subset(sample, BabyBoomers2014==1)
GX <- subset(sample, GenerationX2014==1) 
M <- subset(sample, Millennials2014==1) 


for(i in 1:n_draws){ 
  w0.SG[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*SG$female2 + 
                              sim_coefs[i,4]*SG$Millennials2014 +
                              sim_coefs[i,5]*SG$GenerationX2014 +
                              sim_coefs[i,6]*SG$SilentGeneration2014+
                              sim_coefs[i,7]*SG$voted2012+
                              sim_coefs[i,8]*SG$obamavote2012+
                              sim_coefs[i,9]*SG$female2*0+
                              sim_coefs[i,10]*SG$Millennials2014*0 +
                              sim_coefs[i,11]*SG$GenerationX2014*0 +
                              sim_coefs[i,12]*SG$SilentGeneration2014*0))
  w1.SG[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*SG$female2 + 
                              sim_coefs[i,4]*SG$Millennials2014 +
                              sim_coefs[i,5]*SG$GenerationX2014 +
                              sim_coefs[i,6]*SG$SilentGeneration2014+
                              sim_coefs[i,7]*SG$voted2012+
                              sim_coefs[i,8]*SG$obamavote2012+
                              sim_coefs[i,9]*SG$female2*1+
                              sim_coefs[i,10]*SG$Millennials2014*1 +
                              sim_coefs[i,11]*SG$GenerationX2014*1 +
                              sim_coefs[i,12]*SG$SilentGeneration2014*1))
  w0.BB[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*BB$female2 + 
                              sim_coefs[i,4]*BB$Millennials2014 +
                              sim_coefs[i,5]*BB$GenerationX2014 +
                              sim_coefs[i,6]*BB$SilentGeneration2014+
                              sim_coefs[i,7]*BB$voted2012+
                              sim_coefs[i,8]*BB$obamavote2012+
                              sim_coefs[i,9]*BB$female2*0+
                              sim_coefs[i,10]*BB$Millennials2014*0 +
                              sim_coefs[i,11]*BB$GenerationX2014*0 +
                              sim_coefs[i,12]*BB$SilentGeneration2014*0))
  w1.BB[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*BB$female2 + 
                              sim_coefs[i,4]*BB$Millennials2014 +
                              sim_coefs[i,5]*BB$GenerationX2014 +
                              sim_coefs[i,6]*BB$SilentGeneration2014+
                              sim_coefs[i,7]*BB$voted2012+
                              sim_coefs[i,8]*BB$obamavote2012+
                              sim_coefs[i,9]*BB$female2*1+
                              sim_coefs[i,10]*BB$Millennials2014*1 +
                              sim_coefs[i,11]*BB$GenerationX2014*1 +
                              sim_coefs[i,12]*BB$SilentGeneration2014*1))
  w0.GX[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*GX$female2 + 
                              sim_coefs[i,4]*GX$Millennials2014 +
                              sim_coefs[i,5]*GX$GenerationX2014 +
                              sim_coefs[i,6]*GX$SilentGeneration2014+
                              sim_coefs[i,7]*GX$voted2012+
                              sim_coefs[i,8]*GX$obamavote2012+
                              sim_coefs[i,9]*GX$female2*0+
                              sim_coefs[i,10]*GX$Millennials2014*0 +
                              sim_coefs[i,11]*GX$GenerationX2014*0 +
                              sim_coefs[i,12]*GX$SilentGeneration2014*0))
  w1.GX[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*GX$female2 + 
                              sim_coefs[i,4]*GX$Millennials2014 +
                              sim_coefs[i,5]*GX$GenerationX2014 +
                              sim_coefs[i,6]*GX$SilentGeneration2014+
                              sim_coefs[i,7]*GX$voted2012+
                              sim_coefs[i,8]*GX$obamavote2012+
                              sim_coefs[i,9]*GX$female2*1+
                              sim_coefs[i,10]*GX$Millennials2014*1 +
                              sim_coefs[i,11]*GX$GenerationX2014*1 +
                              sim_coefs[i,12]*GX$SilentGeneration2014*1))
  w0.M[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                             sim_coefs[i,3]*M$female2 + 
                             sim_coefs[i,4]*M$Millennials2014 +
                             sim_coefs[i,5]*M$GenerationX2014 +
                             sim_coefs[i,6]*M$SilentGeneration2014+
                             sim_coefs[i,7]*M$voted2012+
                             sim_coefs[i,8]*M$obamavote2012+
                             sim_coefs[i,9]*M$female2*0+
                             sim_coefs[i,10]*M$Millennials2014*0 +
                             sim_coefs[i,11]*M$GenerationX2014*0 +
                             sim_coefs[i,12]*M$SilentGeneration2014*0))
  w1.M[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                             sim_coefs[i,3]*M$female2 + 
                             sim_coefs[i,4]*M$Millennials2014 +
                             sim_coefs[i,5]*M$GenerationX2014 +
                             sim_coefs[i,6]*M$SilentGeneration2014+
                             sim_coefs[i,7]*M$voted2012+
                             sim_coefs[i,8]*M$obamavote2012+
                             sim_coefs[i,9]*M$female2*1+
                             sim_coefs[i,10]*M$Millennials2014*1 +
                             sim_coefs[i,11]*M$GenerationX2014*1 +
                             sim_coefs[i,12]*M$SilentGeneration2014*1))}
effect.SG	<- w1.SG - w0.SG 
quantile(effect.SG, c(.025,.975)) 
mean(effect.SG) 

effect.BB	<-  w1.BB - w0.BB
quantile(effect.BB, c(.025,.975))  
mean(effect.BB) 

effect.GX	<-  w1.GX - w0.GX
quantile(effect.GX, c(.025,.975))  
mean(effect.GX) 

effect.M	<-  w1.M - w0.M
quantile(effect.M, c(.025,.975)) 
mean(effect.M)  
rm(SG, BB, GX, M) #drop whats no longer needed.
 
#Effect by gender subgroup.
w0.Men	<- NULL
w1.Men 	<- NULL
w0.Women	<- NULL
w1.Women 	<- NULL

Men <- subset(sample, female2==0)
Women <- subset(sample, female2==1) 

for(i in 1:n_draws){ 
  w0.Men[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                               sim_coefs[i,3]*Men$female2 + 
                               sim_coefs[i,4]*Men$Millennials2014 +
                               sim_coefs[i,5]*Men$GenerationX2014 +
                               sim_coefs[i,6]*Men$SilentGeneration2014+
                               sim_coefs[i,7]*Men$voted2012+
                               sim_coefs[i,8]*Men$obamavote2012+
                               sim_coefs[i,9]*Men$female2*0+
                               sim_coefs[i,10]*Men$Millennials2014*0 +
                               sim_coefs[i,11]*Men$GenerationX2014*0 +
                               sim_coefs[i,12]*Men$SilentGeneration2014*0))
  w1.Men[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                               sim_coefs[i,3]*Men$female2 + 
                               sim_coefs[i,4]*Men$Millennials2014 +
                               sim_coefs[i,5]*Men$GenerationX2014 +
                               sim_coefs[i,6]*Men$SilentGeneration2014+
                               sim_coefs[i,7]*Men$voted2012+
                               sim_coefs[i,8]*Men$obamavote2012+
                               sim_coefs[i,9]*Men$female2*1+
                               sim_coefs[i,10]*Men$Millennials2014*1 +
                               sim_coefs[i,11]*Men$GenerationX2014*1 +
                               sim_coefs[i,12]*Men$SilentGeneration2014*1))
  w0.Women[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                                 sim_coefs[i,3]*Women$female2 + 
                                 sim_coefs[i,4]*Women$Millennials2014 +
                                 sim_coefs[i,5]*Women$GenerationX2014 +
                                 sim_coefs[i,6]*Women$SilentGeneration2014+
                                 sim_coefs[i,7]*Women$voted2012+
                                 sim_coefs[i,8]*Women$obamavote2012+
                                 sim_coefs[i,9]*Women$female2*0+
                                 sim_coefs[i,10]*Women$Millennials2014*0 +
                                 sim_coefs[i,11]*Women$GenerationX2014*0 +
                                 sim_coefs[i,12]*Women$SilentGeneration2014*0))
  w1.Women[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                                 sim_coefs[i,3]*Women$female2 + 
                                 sim_coefs[i,4]*Women$Millennials2014 +
                                 sim_coefs[i,5]*Women$GenerationX2014 +
                                 sim_coefs[i,6]*Women$SilentGeneration2014+
                                 sim_coefs[i,7]*Women$voted2012+
                                 sim_coefs[i,8]*Women$obamavote2012+
                                 sim_coefs[i,9]*Women$female2*1+
                                 sim_coefs[i,10]*Women$Millennials2014*1 +
                                 sim_coefs[i,11]*Women$GenerationX2014*1 +
                                 sim_coefs[i,12]*Women$SilentGeneration2014*1))
}
effect.Men	<- w1.Men - w0.Men 
quantile(effect.Men, c(.025,.975)) 
mean(effect.Men) 

effect.Women	<-  w1.Women - w0.Women
quantile(effect.Women, c(.025,.975)) 
mean(effect.Women) 

rm(Men, Women) #drop whats no longer needed.

#Store Effects.
Election<-c(rep("2014 Primary (D)",8))
Effect<-c(mean(effect.01), mean(effect.Women), mean(effect.Men), NA, mean(effect.M),mean(effect.GX),mean(effect.BB),mean(effect.SG))
LowerCI<-c(quantile(effect.01, c(.025)),  quantile(effect.Women, c(.025)), quantile(effect.Men, c(.025)), NA,quantile(effect.M, c(.025)),quantile(effect.GX, c(.025)),quantile(effect.BB, c(.025)),quantile(effect.SG, c(.025)))
UpperCI<-c(quantile(effect.01, c(.975)), quantile(effect.Women, c(.975)), quantile(effect.Men, c(.975)), NA, quantile(effect.M, c(.975)),quantile(effect.GX, c(.975)),quantile(effect.BB, c(.975)),quantile(effect.SG, c(.975)))
PAprimary<-data.frame(Effect, UpperCI, LowerCI, Election) 
PAprimary$`Registrant Subgroup`<-factor(c(1,2,3,4,5,6,7,8), levels=c(1,2,3,4,5,6,7,8),
                                        labels=c("Overall", "Women", "Men", "Gen. Z", "Millennials", "Gen. X", "Baby\nBoomers", "Silent Gen."), ordered = T)
#write effects to data frame.
write.csv(PAprimary, "PA2014PrimaryEffects.csv", row.names=F)
